shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
unsigned long new_type)
{
- unsigned long min_type, max_type;
struct pfn_info *page = pfn_to_page(gmfn);
int pinned = 0, okay = 1;
}
if ( unlikely(page_is_page_table(page)) )
- {
- min_type = shadow_max_pgtable_type(d, gpfn) + PGT_l1_shadow;
- max_type = new_type;
- }
- else
- {
- min_type = PGT_l1_shadow;
- max_type = PGT_l1_shadow;
- }
- FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p",
- gpfn, gmfn, new_type, min_type, max_type);
+ return 1;
- if ( (min_type <= max_type) &&
- !shadow_remove_all_write_access(d, min_type, max_type, gpfn, gmfn) )
+ FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p", gpfn, gmfn, new_type);
+
+ if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
return 0;
// To convert this page to use as a page table, the writable count
return 0;
}
+#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
+static inline unsigned long
+predict_writable_pte_page(struct domain *d, unsigned long gpfn)
+{
+ return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
+}
+
+static inline void
+increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
+{
+ unsigned long score = prediction & PGT_score_mask;
+ int create = (score == 0);
+
+ // saturating addition
+ score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
+ score = score ? score : PGT_score_mask;
+
+ prediction = (prediction & PGT_mfn_mask) | score;
+
+ //printk("increase gpfn=%p pred=%p create=%d\n", gpfn, prediction, create);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+
+ if ( create )
+ perfc_incr(writable_pte_predictions);
+}
+
+static inline void
+decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
+{
+ unsigned long score = prediction & PGT_score_mask;
+ ASSERT(score);
+
+ // divide score by 2... We don't like bad predictions.
+ //
+ score = (score >> 1) & PGT_score_mask;
+
+ prediction = (prediction & PGT_mfn_mask) | score;
+
+ //printk("decrease gpfn=%p pred=%p score=%p\n", gpfn, prediction, score);
+
+ if ( score )
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ else
+ {
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ perfc_decr(writable_pte_predictions);
+ }
+}
+
static u32 remove_all_write_access_in_ptpage(
- struct domain *d, unsigned long pt_mfn, unsigned long readonly_mfn)
+ struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
+ unsigned long readonly_gpfn, unsigned long readonly_gmfn,
+ u32 max_refs_to_find, unsigned long prediction)
{
unsigned long *pt = map_domain_mem(pt_mfn << PAGE_SHIFT);
unsigned long match =
- (readonly_mfn << PAGE_SHIFT) | _PAGE_RW | _PAGE_PRESENT;
+ (readonly_gmfn << PAGE_SHIFT) | _PAGE_RW | _PAGE_PRESENT;
unsigned long mask = PAGE_MASK | _PAGE_RW | _PAGE_PRESENT;
int i;
- u32 count = 0;
+ u32 found = 0;
int is_l1_shadow =
((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
- for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+#define MATCH_ENTRY(_i) (((pt[_i] ^ match) & mask) == 0)
+
+ // returns true if all refs have been found and fixed.
+ //
+ int fix_entry(int i)
{
- if ( unlikely(((pt[i] ^ match) & mask) == 0) )
- {
- unsigned long old = pt[i];
- unsigned long new = old & ~_PAGE_RW;
+ unsigned long old = pt[i];
+ unsigned long new = old & ~_PAGE_RW;
- if ( is_l1_shadow &&
- !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
- BUG();
+ if ( is_l1_shadow && !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
+ BUG();
+ found++;
+ pt[i] = new;
+ if ( is_l1_shadow )
+ put_page_from_l1e(mk_l1_pgentry(old), d);
- count++;
- pt[i] = new;
+#if 0
+ printk("removed write access to pfn=%p mfn=%p in smfn=%p entry %x "
+ "is_l1_shadow=%d\n",
+ readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
+#endif
- if ( is_l1_shadow )
- put_page_from_l1e(mk_l1_pgentry(old), d);
+ return (found == max_refs_to_find);
+ }
- FSH_LOG("removed write access to mfn=%p in smfn=%p entry %x "
- "is_l1_shadow=%d",
- readonly_mfn, pt_mfn, i, is_l1_shadow);
- }
+ if ( MATCH_ENTRY(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) &&
+ fix_entry(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) )
+ {
+ perfc_incrc(remove_write_fast_exit);
+ increase_writable_pte_prediction(d, readonly_gpfn, prediction);
+ unmap_domain_mem(pt);
+ return found;
+ }
+
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+ {
+ if ( unlikely(MATCH_ENTRY(i)) && fix_entry(i) )
+ break;
}
unmap_domain_mem(pt);
- return count;
+ return found;
+#undef MATCH_ENTRY
}
int shadow_remove_all_write_access(
- struct domain *d, unsigned min_type, unsigned max_type,
- unsigned long gpfn, unsigned long gmfn)
+ struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
{
int i;
struct shadow_status *a;
- unsigned long sl1mfn = __shadow_status(d, gpfn, PGT_l1_shadow);
- u32 count = 0;
- u32 write_refs;
+ u32 found = 0, fixups, write_refs;
+ unsigned long prediction, predicted_gpfn, predicted_smfn;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
- ASSERT(gmfn);
+ ASSERT(VALID_MFN(readonly_gmfn));
perfc_incrc(remove_write_access);
- if ( (frame_table[gmfn].u.inuse.type_info & PGT_type_mask) ==
+ // If it's not a writable page, then no writable refs can be outstanding.
+ //
+ if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
PGT_writable_page )
{
- write_refs = (frame_table[gmfn].u.inuse.type_info & PGT_count_mask);
- if ( write_refs &&
- (frame_table[gmfn].u.inuse.type_info & PGT_pinned) )
- write_refs--;
- if ( write_refs == 0 )
+ perfc_incrc(remove_write_not_writable);
+ return 1;
+ }
+
+ // How many outstanding writable PTEs for this page are there?
+ //
+ write_refs = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
+ if ( write_refs && (frame_table[readonly_gmfn].u.inuse.type_info & PGT_pinned) )
+ write_refs--;
+
+ if ( write_refs == 0 )
+ {
+ perfc_incrc(remove_write_no_work);
+ return 1;
+ }
+
+ // Before searching all the L1 page tables, check the typical culprit first.
+ //
+ if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
+ {
+ predicted_gpfn = prediction & PGT_mfn_mask;
+ if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
+ (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
+ {
+ found += fixups;
+ if ( found == write_refs )
+ {
+ perfc_incrc(remove_write_predicted);
+ return 1;
+ }
+ }
+ else
{
- perfc_incrc(remove_write_access_easy);
- return 1;
+ perfc_incrc(remove_write_bad_prediction);
+ decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
}
}
+ // Search all the shadow L1 page tables...
+ //
for (i = 0; i < shadow_ht_buckets; i++)
{
a = &d->arch.shadow_ht[i];
while ( a && a->gpfn_and_flags )
{
- if ( ((a->gpfn_and_flags & PGT_type_mask) >= min_type) &&
- ((a->gpfn_and_flags & PGT_type_mask) <= max_type) )
+ if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
{
- switch ( a->gpfn_and_flags & PGT_type_mask )
- {
- case PGT_l1_shadow:
- count +=
- remove_all_write_access_in_ptpage(d, a->smfn, gmfn);
- if ( count == write_refs )
- return 1;
- break;
- case PGT_l2_shadow:
- if ( sl1mfn )
- count +=
- remove_all_write_access_in_ptpage(d, a->smfn,
- sl1mfn);
- if ( count == write_refs )
- return 1;
- break;
- case PGT_hl2_shadow:
- // nothing to do here...
- break;
- default:
- // need to flush this out for 4 level page tables.
- BUG();
- }
+ found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
+ if ( found == write_refs )
+ return 1;
}
+
a = a->next;
}
}
FSH_LOG("%s: looking for %d refs, found %d refs\n",
- __func__, write_refs, count);
+ __func__, write_refs, found);
return 0;
}
return count;
}
-u32 shadow_remove_all_access(struct domain *d, unsigned long gmfn)
+u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
{
int i;
struct shadow_status *a;
a = &d->arch.shadow_ht[i];
while ( a && a->gpfn_and_flags )
{
- if ( ((a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow) ||
- ((a->gpfn_and_flags & PGT_type_mask) == PGT_hl2_shadow) )
+ switch (a->gpfn_and_flags & PGT_type_mask)
{
- count += remove_all_access_in_page(d, a->smfn, gmfn);
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_l3_shadow:
+ case PGT_l4_shadow:
+ case PGT_hl2_shadow:
+ count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ // these can't hold refs to the forbidden page
+ break;
+ default:
+ BUG();
}
+
a = a->next;
}
}
BUG(); // XXX - ought to fix this...
break;
case PGT_snapshot:
+ case PGT_writable_ref:
break;
default:
errors++;
struct shadow_status {
unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
- struct shadow_status *next; /* Pull-to-front list. */
+ struct shadow_status *next; /* Pull-to-front list per hash bucket. */
unsigned long smfn; /* Shadow mfn. */
+
+ // Pull-to-front list of L1s/L2s from which we check when removing
+ // write access to a page.
+ //struct list_head next_to_check;
};
#define shadow_ht_extra_size 128
else
mfn = __gpfn_to_mfn(d, pfn);
- if ( VALID_MFN(mfn) )
+ if ( VALID_MFN(mfn) && (mfn < max_page) )
hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
}
perfc_value(shadow_l1_pages) +
perfc_value(shadow_l2_pages) +
perfc_value(hl2_table_pages) +
- perfc_value(snapshot_pages)
+ perfc_value(snapshot_pages) +
+ perfc_value(writable_pte_predictions)
) - live;
#ifdef PERF_COUNTERS
if ( (abs < -1) || (abs > 1) )
{
- printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d\n",
+ printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
live, free,
perfc_value(shadow_l1_pages),
perfc_value(shadow_l2_pages),
perfc_value(hl2_table_pages),
- perfc_value(snapshot_pages));
+ perfc_value(snapshot_pages),
+ perfc_value(writable_pte_predictions));
BUG();
}
#endif
ASSERT(gpfn == (gpfn & PGT_mfn_mask));
ASSERT(stype && !(stype & ~PGT_type_mask));
- if ( VALID_MFN(gmfn) &&
- ((stype != PGT_snapshot)
- ? !mfn_is_page_table(gmfn)
- : !mfn_out_of_sync(gmfn)) )
+ if ( VALID_MFN(gmfn) && (gmfn < max_page) &&
+ (stype != PGT_writable_pred) &&
+ ((stype == PGT_snapshot)
+ ? !mfn_out_of_sync(gmfn)
+ : !mfn_is_page_table(gmfn)) )
{
perfc_incrc(shadow_status_shortcut);
+#ifndef NDEBUG
ASSERT(___shadow_status(d, gpfn, stype) == 0);
+
+ // Undo the affects of the above ASSERT on ___shadow_status()'s perf
+ // counters.
+ //
+ perfc_decrc(shadow_status_calls);
+ perfc_decrc(shadow_status_miss);
+#endif
return 0;
}
{
type = x->gpfn_and_flags & PGT_type_mask;
- // Treat an HL2 as if it's an L1
- //
- if ( type == PGT_hl2_shadow )
+ switch ( type )
+ {
+ case PGT_hl2_shadow:
+ // Treat an HL2 as if it's an L1
+ //
type = PGT_l1_shadow;
-
- // Ignore snapshots -- they don't in and of themselves constitute
- // treating a page as a page table
- //
- if ( type == PGT_snapshot )
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ // Ignore snapshots -- they don't in and of themselves constitute
+ // treating a page as a page table
+ //
goto next;
-
- // Early exit if we found the max possible value
- //
- if ( type == PGT_base_page_table )
+ case PGT_base_page_table:
+ // Early exit if we found the max possible value
+ //
return type;
+ default:
+ break;
+ }
if ( type > pttype )
pttype = type;
found:
// release ref to page
- put_page(pfn_to_page(gmfn));
+ if ( stype != PGT_writable_pred )
+ put_page(pfn_to_page(gmfn));
shadow_audit(d, 0);
}
int i;
unsigned long key = gpfn | stype;
- SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p\n", gpfn, gmfn, smfn, stype);
+ SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p", gpfn, gmfn, smfn, stype);
ASSERT(spin_is_locked(&d->arch.shadow_lock));
ASSERT(shadow_mode_translate(d) || gpfn);
ASSERT(!(gpfn & ~PGT_mfn_mask));
-
- ASSERT(pfn_is_ram(gmfn)); // XXX need to be more graceful
- ASSERT(smfn && !(smfn & ~PGT_mfn_mask));
+
+ // XXX - need to be more graceful.
+ ASSERT(VALID_MFN(gmfn));
+
ASSERT(stype && !(stype & ~PGT_type_mask));
x = head = hash_bucket(d, gpfn);
// grab a reference to the guest page to represent the entry in the shadow
// hash table
//
- get_page(pfn_to_page(gmfn), d);
+ // XXX - Should PGT_writable_pred grab a page ref?
+ // - Who/how are these hash table entry refs flushed if/when a page
+ // is given away by the domain?
+ //
+ if ( stype != PGT_writable_pred )
+ get_page(pfn_to_page(gmfn), d);
/*
* STEP 1. If page is already in the table, update it in place.
*/
do
{
- if ( x->gpfn_and_flags == key )
+ if ( unlikely(x->gpfn_and_flags == key) )
{
- BUG();
+ if ( stype != PGT_writable_pred )
+ BUG(); // we should never replace entries into the hash table
x->smfn = smfn;
+ put_page(pfn_to_page(gmfn)); // already had a ref...
goto done;
}
done:
shadow_audit(d, 0);
+
+ if ( stype <= PGT_l4_shadow )
+ {
+ // add to front of list of pages to check when removing write
+ // permissions for a page...
+ //
+ }
}
/************************************************************************/